<<<<<<< HEAD ======= >>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5 ERCC <<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Aim

To assess the quality of whole transcriptomic Iso-Seq runs using ERCC as benchmark
- Number of reads mapped to ERCC (difference between WT and TG?)
- Correlation of ERCC known concentration and FL Reads per ERCC detected
- Features of those ERCCs not detected
- Additional: saturation of detecting ERCCs

======= ERCC_conc <- read.csv("/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/ONT/ERCC/ERCC_calculations.csv", header = T)[-1,]
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Chain: Number of Reads

Pipeline

This was performed by merging only WT (n = 6), only TG (n = 6) and all the samples (n = 12) together at Isoseq3 Cluster, align to ERCC, cupcake collapse and chain three datasets, followed by SQANTI.

Pipeline for analysis

Pipeline for analysis

Classification file

======= class <- read.table("/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/All_Samples/ERCC/SQANTI/all_samples.chained_classification.filtered_lite_classification.txt", header = T, as.is = T, sep = "\t") datatable(head(class), options = list(scrollX = TRUE))
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Redundant Isoforms from Chained Data

Plot is drawn from All_Merged dataset in Sqanti chained output

<<<<<<< HEAD
=======
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

All: Number of Reads

Pipeline

Pipeline for analysis

Pipeline for analysis

Classification file

======= class <- read.table("/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/All_Samples/ERCC/SQANTIAll/All_Merged.collapsed.filtered_classification.filtered_lite_classification.txt", header = T, as.is = T, sep = "\t") datatable(head(class), options = list(scrollX = TRUE))
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Redundant Isoforms

<<<<<<< HEAD
=======
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Demultiplex: Number of Reads

This was performed by merging only WT (n = 6), only TG (n = 6) and all the samples (n = 12) together at Isoseq3 Cluster, align to ERCC, cupcake collapse, demultiplex, followed by SQANTI.
Note: only 10 samples had ERCCs

Pipeline

Pipeline for analysis

Pipeline for analysis

Classification file

<<<<<<< HEAD
=======
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Unique ERCC

## Total unique ERCCs in both WT and TG: 37 (40.22%)
##   Phenotype Mean_ERCC_Detected  perc
## 1        TG               32.2 35.00
## 2        WT               32.4 35.22
<<<<<<< HEAD

=======

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Redundant Isoforms

<<<<<<< HEAD
=======
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5
## Number of ERCC with more than one isoform 8 ( 8.7 %)

+ TAMA

To use TAMA’s tama_remove_fragment_models.py to remove shorter redundant “isoforms” from the same ERCCs. Script is applied after SQANTI filtering.

<<<<<<< HEAD
TAMA <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/OLD_2020/All_Samples/ERCC/TOFU/TAMA_Filter/"
=======
TAMA <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/All_Samples/ERCC/TOFU/TAMA_Filter/"
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5
tama_retained <- read.table(paste0(TAMA,"All_Merged_postsqanti.bed")) %>% mutate(TAMA = "Retained") 
tama_discard <- read.table(paste0(TAMA,"All_Merged_postsqanti_discarded.txt")) %>% mutate(TAMA = "Discarded")
tama <- bind_rows(tama_retained, tama_discard) %>% mutate(isoform = word(.$V4, c(2),sep = ";"))

p9 <- tama %>% group_by(V1,TAMA) %>% tally() %>%
  ggplot(., aes(x = reorder(V1,-n), y = n, fill = TAMA)) + geom_bar(stat = "identity") + 
  labs(y = "Number of Isoforms", x = "", title = "\n\n") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.title = element_blank(), legend.position = c(0.9, 0.9)) +
  mytheme 

# reads filtered from TAMA 
# total FL reads across 10 samples associated with isoform 
p10 <- class[class$chrom %in% tama_discard$V1,] %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "Full-Length Reads (Log10)", x = "", title = "\n\n") 

p11 <- class %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "Full-Length Reads (Log10)", x = "") 

p9 

Correlation

## 
##  Pearson's product-moment correlation
## 
## data:  dat[[x.var]] and dat[[y.var]]
## t = 8.5057, df = 35, p-value = 4.89e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6770173 0.9043953
## sample estimates:
##       cor 
## 0.8209479 
## 
## [1] ""
## [1] "Correlation betweenlog2_amount_of_ERCCandlog2_FL_reads"
## [1] "corr.value0.820947870699425"
## [1] "p.value4.89015691673981e-10"

Mapping: Filtered ERCCs

Number of reads mapped

Merged

<<<<<<< HEAD
All_Samples_dir <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/OLD_2020/All_Samples/"
=======
All_Samples_dir <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/All_Samples/"
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

# ERCC_mapped, ERCC_unmapped, Mouse all, Mouse mapped, mouse unmapped
Merged_mapping <- list(
  ## ERCC
  read.table(paste0(All_Samples_dir,"/ERCC/MAPPING/All_Merged_reads_with_alignment_statistics.txt")),
  read.table(paste0(All_Samples_dir, "/ERCC/MAPPING/All_Merged.notread.clustered.hq.fastq.paf"), as.is = T, sep = "\t") %>%
    mutate(transcript = word(V1, c(1), sep = fixed(" "))),
  
  ## Mouse 
  read.table(paste0(All_Samples_dir,"/MOUSE/MAPPING/PAF/All_Merged.allread.clustered.hq.fastq.paf"), as.is = T, sep = "\t") %>%
    mutate(transcript = word(V1, c(1), sep = fixed(" "))),  
  read.table(paste0(All_Samples_dir, "/MOUSE/MAPPING/PAF/All_Merged_reads_with_alignment_statistics.txt"), as.is = T, sep = "\t") %>%
    mutate(transcript = word(V1, c(1), sep = fixed(" "))),
  read.table(paste0(All_Samples_dir,"/MOUSE/MAPPING/PAF/All_Merged.notread.clustered.hq.fastq.paf"), as.is = T, sep = "\t")
) 

names(Merged_mapping) <- c("ERCC_mapped","ERCC_unmapped","Mouse_all","Mouse_mapped","Mouse_unmapped")

#total_num
# number of total FL reads being mapped
# number of mapped ERCC reads  
# number of mapped mouse reads
# number of unmapped reads

total_num <- data.frame("total_fl" = nrow(Merged_mapping$Mouse_all),
                        "ERCC_mapped" = nrow(Merged_mapping$ERCC_mapped),
                        "Mouse_mapped" = nrow(Merged_mapping$Mouse_mapped),
                        "Mouse_unmapped" = length(setdiff(Merged_mapping$Mouse_unmapped$V1,Merged_mapping$ERCC_mapped$V1)))

total_num <- total_num %>% melt() %>% mutate(perc = value/total_num$total_fl * 100) %>%
  filter(variable != "total_fl") %>% 
  mutate(sample = "Merged") %>% 
  mutate(phenotype = "Merged") %>% 
  mutate(all_reads = total_num$total_fl) %>%
  select(sample, phenotype, variable, value, all_reads) %>%
  `colnames<-`(c("sample", "phenotype","type", "total_reads", "all_reads")) %>% 
  mutate(perc = total_reads/all_reads * 100) %>%
<<<<<<< HEAD
  mutate(dataset = "merged")

total_num
##   sample phenotype           type total_reads all_reads       perc dataset
## 1 Merged    Merged    ERCC_mapped         365    277968  0.1313101  merged
## 2 Merged    Merged   Mouse_mapped      276035    277968 99.3045962  merged
## 3 Merged    Merged Mouse_unmapped        1568    277968  0.5640937  merged

Individual

############# Individual
Individual_Mouse_Mapping_dir = "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/OLD_2020/Individual/MAPPING"
Individual_ERCC_Mapping_dir = "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/OLD_2020/Individual/ERCC_MAPPING"

Individual_Mapping <- list(
  # ERCC 
  # do not include Q21 and O18 as did not use ERCCs
  lapply(grep(list.files(path = Individual_ERCC_Mapping_dir, pattern = "reads_with_alignment_statistics.txt", full.names = T),
                     pattern=c("Q21|O18"), invert=TRUE, value=TRUE),
       function(x) read.table(x,as.is = T, sep = "\t") %>% 
         mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  
  # Mouse
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "allread.clustered.hq.fastq.paf", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t") %>% 
         mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "reads_with_alignment_statistics.txt", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t") %>% mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "notread.clustered.hq.fastq.paf", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t"))
)
names(Individual_Mapping) <- c("ERCC_mapped","all","mapped","unmapped")
# the sample name input into each list is the same order (alphabetical)
names(Individual_Mapping$ERCC_mapped) <- grep(list.files(path = Individual_ERCC_Mapping_dir, pattern = "reads_with_alignment_statistics.txt"),pattern=c("Q21|O18"), invert=TRUE, value=TRUE)
names(Individual_Mapping$all) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "allread.clustered.hq.fastq.paf")
names(Individual_Mapping$mapped) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "reads_with_alignment_statistics.txt")
names(Individual_Mapping$unmapped) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "notread.clustered.hq.fastq.paf")

individual_total_num <- bind_rows(
  ldply(Individual_Mapping$ERCC_mapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("_"))) %>% 
         mutate(type = "ERCC_mapped"), 
  ldply(Individual_Mapping$mapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("_"))) %>%
         mutate(type = "Mouse_mapped"),
  ldply(Individual_Mapping$unmapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("."))) %>%
         mutate(type = "Mouse_unmapped")
)

individual_total_reads <- ldply(Individual_Mapping$all, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed(".")))
individual_total_num <- individual_total_num %>% 
  mutate(phenotype = ifelse(.$sample %in% c("O18","K18","S18","L22","Q20","K24"), "TG","WT")) %>%
  left_join(., individual_total_reads, by = "sample") %>%
  select(sample, phenotype, type, V1.x, V1.y) %>%
  `colnames<-`(c("sample", "phenotype","type", "total_reads", "all_reads")) %>%
  mutate(perc = total_reads/all_reads * 100) %>% 
  mutate(dataset = "individual")


all_num <- bind_rows(individual_total_num, total_num)

plot_mapping <- function(dat){
  p <- ggplot(dat, aes(x = phenotype, y = perc, fill = phenotype)) + geom_boxplot() +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(0.7), dotsize = 0.5) + 
    mytheme + labs(x = "", y = "Full-Length clustered reads (%)", title = "\n\n") +
    scale_fill_manual(values = c(label_colour("TG"),label_colour("WT"),label_colour("Merged"))) + 
    theme(legend.position = "none",
          strip.background = element_blank(),
          strip.text.x = element_blank()) +
    facet_grid(~dataset,scales = "free_x")
  
  return(p)
}

plot_mapping(all_num %>% filter(type == "Mouse_mapped")) 

======= mutate(dataset = "merged")

Individual

############# Individual
Individual_Mouse_Mapping_dir = "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/Individual/MAPPING"
Individual_ERCC_Mapping_dir = "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/Individual/ERCC_MAPPING"

Individual_Mapping <- list(
  # ERCC 
  # do not include Q21 and O18 as did not use ERCCs
  lapply(grep(list.files(path = Individual_ERCC_Mapping_dir, pattern = "reads_with_alignment_statistics.txt", full.names = T),
                     pattern=c("Q21|O18"), invert=TRUE, value=TRUE),
       function(x) read.table(x,as.is = T, sep = "\t") %>% 
         mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  
  # Mouse
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "allread.clustered.hq.fastq.paf", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t") %>% 
         mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "reads_with_alignment_statistics.txt", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t") %>% mutate(transcript = word(V1, c(1), sep = fixed(" ")))),
  lapply(list.files(path = Individual_Mouse_Mapping_dir, pattern = "notread.clustered.hq.fastq.paf", full.names = T),
       function(x) read.table(x,as.is = T, sep = "\t"))
)
names(Individual_Mapping) <- c("ERCC_mapped","all","mapped","unmapped")
# the sample name input into each list is the same order (alphabetical)
names(Individual_Mapping$ERCC_mapped) <- grep(list.files(path = Individual_ERCC_Mapping_dir, pattern = "reads_with_alignment_statistics.txt"),pattern=c("Q21|O18"), invert=TRUE, value=TRUE)
names(Individual_Mapping$all) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "allread.clustered.hq.fastq.paf")
names(Individual_Mapping$mapped) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "reads_with_alignment_statistics.txt")
names(Individual_Mapping$unmapped) <- list.files(path = Individual_Mouse_Mapping_dir, pattern = "notread.clustered.hq.fastq.paf")

individual_total_num <- bind_rows(
  ldply(Individual_Mapping$ERCC_mapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("_"))) %>% 
         mutate(type = "ERCC_mapped"), 
  ldply(Individual_Mapping$mapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("_"))) %>%
         mutate(type = "Mouse_mapped"),
  ldply(Individual_Mapping$unmapped, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed("."))) %>%
         mutate(type = "Mouse_unmapped")
)

individual_total_reads <- ldply(Individual_Mapping$all, function(x) nrow(x)) %>% mutate(sample = word(.id, c(1), sep = fixed(".")))
individual_total_num <- individual_total_num %>% 
  mutate(phenotype = ifelse(.$sample %in% c("O18","K18","S18","L22","Q20","K24"), "TG","WT")) %>%
  left_join(., individual_total_reads, by = "sample") %>%
  select(sample, phenotype, type, V1.x, V1.y) %>%
  `colnames<-`(c("sample", "phenotype","type", "total_reads", "all_reads")) %>%
  mutate(perc = total_reads/all_reads * 100) %>% 
  mutate(dataset = "individual")


all_num <- bind_rows(individual_total_num, total_num)

plot_mapping <- function(dat){
  p <- ggplot(dat, aes(x = phenotype, y = perc, fill = phenotype)) + geom_boxplot() +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(0.7), dotsize = 0.5) + 
    mytheme + labs(x = "", y = "Full-Length clustered reads (%)", title = "\n\n") +
    scale_fill_manual(values = c(label_colour("TG"),label_colour("WT"),label_colour("Merged"))) + 
    theme(legend.position = "none",
          strip.background = element_blank(),
          strip.text.x = element_blank()) +
    facet_grid(~dataset,scales = "free_x")
  
  return(p)
}

plot_mapping(all_num %>% filter(type == "Mouse_mapped")) 

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Alignment vs Identity of all mapped ERCCs

Threshold:
minimum alignment coverage = 0.95 (parameter set from discarded ERCC)
minimum alignment identity = 0.95

<<<<<<< HEAD
## Total unique ERCCs in both WT and TG: 58 (63.04%)
=======
## Total unique ERCCs in both WT and TG: 58 (63.04%)
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Post Filtering

<<<<<<< HEAD
## Total unique ERCCs in both WT and TG (post filtering on length and identity): 57 (61.96%)

=======
## Total unique ERCCs in both WT and TG (post filtering on length and identity): 57 (61.96%)

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Discarded ERCC

Rationale for discarded ERCC (1 isoform) from cupcake collapse
Cupcake’s parameters: minimum alignment coverage = 0.99, minimum alignment identity = 0.95

<<<<<<< HEAD

=======

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Readjustment of Demultiplex

Unique ERCC

Reduced cupcake’s collapse default parameters (0.99) to 0.95

<<<<<<< HEAD
## Total unique ERCCs in both WT and TG: 57 (61.96%)

Redundant Isoforms

Isoforms vs Concentration

=======
## Total unique ERCCs in both WT and TG: 57 (61.96%)

Redundant Isoforms

+ TAMA

To use TAMA’s tama_remove_fragment_models.py to remove shorter redundant “isoforms” from the same ERCCs. Script is applied after SQANTI filtering.

<<<<<<< HEAD
TAMA <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/OLD_2020/All_Samples/ERCC/TOFU_ADJUST/TAMA_Filter/"
tama_retained_adjusted <- read.table(paste0(TAMA,"All_Merged_postsqanti.bed")) %>% mutate(TAMA = "Retain") 
tama_discard <- read.table(paste0(TAMA,"All_Merged_postsqanti_discarded.txt")) %>% mutate(TAMA = "Discard")
tama <- bind_rows(tama_retained_adjusted, tama_discard) %>% mutate(isoform = word(.$V4, c(2),sep = ";"))

p19 <- tama %>% group_by(V1,TAMA) %>% tally() %>%
  ggplot(., aes(x = reorder(V1,-n), y = n, fill = TAMA)) + geom_bar(stat = "identity") + labs(y = "Number of Isoforms", x = "ERCC") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme

# reads filtered from TAMA 
# total FL reads across 10 samples associated with isoform 
p20 <- class[class$chrom %in% tama_discard$V1,] %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "FL Reads (Log10)", x = "ERCC") 

p11 <- class %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "FL Reads (Log10)", x = "ERCC") 

p19 

=======
TAMA <- "/gpfs/mrc0/projects/Research_Project-MRC148213/sl693/IsoSeq/Whole_Transcriptome/All_Samples/ERCC/TOFU_ADJUST/TAMA_Filter/"
tama_retained_adjusted <- read.table(paste0(TAMA,"All_Merged_postsqanti.bed")) %>% mutate(TAMA = "Retain") 
tama_discard <- read.table(paste0(TAMA,"All_Merged_postsqanti_discarded.txt")) %>% mutate(TAMA = "Discard")
tama <- bind_rows(tama_retained_adjusted, tama_discard) %>% mutate(isoform = word(.$V4, c(2),sep = ";"))

p19 <- tama %>% group_by(V1,TAMA) %>% tally() %>%
  ggplot(., aes(x = reorder(V1,-n), y = n, fill = TAMA)) + geom_bar(stat = "identity") + labs(y = "Number of Isoforms", x = "ERCC") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme

# reads filtered from TAMA 
# total FL reads across 10 samples associated with isoform 
p20 <- class[class$chrom %in% tama_discard$V1,] %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "FL Reads (Log10)", x = "ERCC") 

p11 <- class %>%
  mutate(Total_FL = .[, 46:55] %>% rowSums(na.rm = TRUE)) %>%
  left_join(., tama, by = ("isoform")) %>% 
  mutate(log10_Total_FL = log10(Total_FL)) %>%
  ggplot(., aes(x = chrom, y = log10_Total_FL, colour = TAMA)) + geom_point() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  mytheme + labs(y = "FL Reads (Log10)", x = "ERCC") 

p19 

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5

Correlation

>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5
## 
##  Pearson's product-moment correlation
## 
## data:  dat[[x.var]] and dat[[y.var]]
## t = 38.688, df = 55, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9697060 0.9894717
## sample estimates:
##      cor 
## 0.982118 
## 
## [1] ""
## [1] "Correlation betweenlog2_amount_of_ERCCandlog2_FL_reads"
## [1] "corr.value0.982117953981347"
## [1] "p.value1.41213399302975e-41"
<<<<<<< HEAD

=======

## png 
##   2
>>>>>>> c34b526e4f39c14e6ca6579cf2dd5726cda490a5